<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<!-- 2023-04-16 Sun 23:18 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Coding</title>
<meta name="author" content="Guilherme Salome" />
<meta name="generator" content="Org Mode" />
<style>
  #content { max-width: 60em; margin: auto; }
  .title  { text-align: center;
             margin-bottom: .2em; }
  .subtitle { text-align: center;
              font-size: medium;
              font-weight: bold;
              margin-top:0; }
  .todo   { font-family: monospace; color: red; }
  .done   { font-family: monospace; color: green; }
  .priority { font-family: monospace; color: orange; }
  .tag    { background-color: #eee; font-family: monospace;
            padding: 2px; font-size: 80%; font-weight: normal; }
  .timestamp { color: #bebebe; }
  .timestamp-kwd { color: #5f9ea0; }
  .org-right  { margin-left: auto; margin-right: 0px;  text-align: right; }
  .org-left   { margin-left: 0px;  margin-right: auto; text-align: left; }
  .org-center { margin-left: auto; margin-right: auto; text-align: center; }
  .underline { text-decoration: underline; }
  #postamble p, #preamble p { font-size: 90%; margin: .2em; }
  p.verse { margin-left: 3%; }
  pre {
    border: 1px solid #e6e6e6;
    border-radius: 3px;
    background-color: #f2f2f2;
    padding: 8pt;
    font-family: monospace;
    overflow: auto;
    margin: 1.2em;
  }
  pre.src {
    position: relative;
    overflow: auto;
  }
  pre.src:before {
    display: none;
    position: absolute;
    top: -8px;
    right: 12px;
    padding: 3px;
    color: #555;
    background-color: #f2f2f299;
  }
  pre.src:hover:before { display: inline; margin-top: 14px;}
  /* Languages per Org manual */
  pre.src-asymptote:before { content: 'Asymptote'; }
  pre.src-awk:before { content: 'Awk'; }
  pre.src-authinfo::before { content: 'Authinfo'; }
  pre.src-C:before { content: 'C'; }
  /* pre.src-C++ doesn't work in CSS */
  pre.src-clojure:before { content: 'Clojure'; }
  pre.src-css:before { content: 'CSS'; }
  pre.src-D:before { content: 'D'; }
  pre.src-ditaa:before { content: 'ditaa'; }
  pre.src-dot:before { content: 'Graphviz'; }
  pre.src-calc:before { content: 'Emacs Calc'; }
  pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
  pre.src-fortran:before { content: 'Fortran'; }
  pre.src-gnuplot:before { content: 'gnuplot'; }
  pre.src-haskell:before { content: 'Haskell'; }
  pre.src-hledger:before { content: 'hledger'; }
  pre.src-java:before { content: 'Java'; }
  pre.src-js:before { content: 'Javascript'; }
  pre.src-latex:before { content: 'LaTeX'; }
  pre.src-ledger:before { content: 'Ledger'; }
  pre.src-lisp:before { content: 'Lisp'; }
  pre.src-lilypond:before { content: 'Lilypond'; }
  pre.src-lua:before { content: 'Lua'; }
  pre.src-matlab:before { content: 'MATLAB'; }
  pre.src-mscgen:before { content: 'Mscgen'; }
  pre.src-ocaml:before { content: 'Objective Caml'; }
  pre.src-octave:before { content: 'Octave'; }
  pre.src-org:before { content: 'Org mode'; }
  pre.src-oz:before { content: 'OZ'; }
  pre.src-plantuml:before { content: 'Plantuml'; }
  pre.src-processing:before { content: 'Processing.js'; }
  pre.src-python:before { content: 'Python'; }
  pre.src-R:before { content: 'R'; }
  pre.src-ruby:before { content: 'Ruby'; }
  pre.src-sass:before { content: 'Sass'; }
  pre.src-scheme:before { content: 'Scheme'; }
  pre.src-screen:before { content: 'Gnu Screen'; }
  pre.src-sed:before { content: 'Sed'; }
  pre.src-sh:before { content: 'shell'; }
  pre.src-sql:before { content: 'SQL'; }
  pre.src-sqlite:before { content: 'SQLite'; }
  /* additional languages in org.el's org-babel-load-languages alist */
  pre.src-forth:before { content: 'Forth'; }
  pre.src-io:before { content: 'IO'; }
  pre.src-J:before { content: 'J'; }
  pre.src-makefile:before { content: 'Makefile'; }
  pre.src-maxima:before { content: 'Maxima'; }
  pre.src-perl:before { content: 'Perl'; }
  pre.src-picolisp:before { content: 'Pico Lisp'; }
  pre.src-scala:before { content: 'Scala'; }
  pre.src-shell:before { content: 'Shell Script'; }
  pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
  /* additional language identifiers per "defun org-babel-execute"
       in ob-*.el */
  pre.src-cpp:before  { content: 'C++'; }
  pre.src-abc:before  { content: 'ABC'; }
  pre.src-coq:before  { content: 'Coq'; }
  pre.src-groovy:before  { content: 'Groovy'; }
  /* additional language identifiers from org-babel-shell-names in
     ob-shell.el: ob-shell is the only babel language using a lambda to put
     the execution function name together. */
  pre.src-bash:before  { content: 'bash'; }
  pre.src-csh:before  { content: 'csh'; }
  pre.src-ash:before  { content: 'ash'; }
  pre.src-dash:before  { content: 'dash'; }
  pre.src-ksh:before  { content: 'ksh'; }
  pre.src-mksh:before  { content: 'mksh'; }
  pre.src-posh:before  { content: 'posh'; }
  /* Additional Emacs modes also supported by the LaTeX listings package */
  pre.src-ada:before { content: 'Ada'; }
  pre.src-asm:before { content: 'Assembler'; }
  pre.src-caml:before { content: 'Caml'; }
  pre.src-delphi:before { content: 'Delphi'; }
  pre.src-html:before { content: 'HTML'; }
  pre.src-idl:before { content: 'IDL'; }
  pre.src-mercury:before { content: 'Mercury'; }
  pre.src-metapost:before { content: 'MetaPost'; }
  pre.src-modula-2:before { content: 'Modula-2'; }
  pre.src-pascal:before { content: 'Pascal'; }
  pre.src-ps:before { content: 'PostScript'; }
  pre.src-prolog:before { content: 'Prolog'; }
  pre.src-simula:before { content: 'Simula'; }
  pre.src-tcl:before { content: 'tcl'; }
  pre.src-tex:before { content: 'TeX'; }
  pre.src-plain-tex:before { content: 'Plain TeX'; }
  pre.src-verilog:before { content: 'Verilog'; }
  pre.src-vhdl:before { content: 'VHDL'; }
  pre.src-xml:before { content: 'XML'; }
  pre.src-nxml:before { content: 'XML'; }
  /* add a generic configuration mode; LaTeX export needs an additional
     (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
  pre.src-conf:before { content: 'Configuration File'; }

  table { border-collapse:collapse; }
  caption.t-above { caption-side: top; }
  caption.t-bottom { caption-side: bottom; }
  td, th { vertical-align:top;  }
  th.org-right  { text-align: center;  }
  th.org-left   { text-align: center;   }
  th.org-center { text-align: center; }
  td.org-right  { text-align: right;  }
  td.org-left   { text-align: left;   }
  td.org-center { text-align: center; }
  dt { font-weight: bold; }
  .footpara { display: inline; }
  .footdef  { margin-bottom: 1em; }
  .figure { padding: 1em; }
  .figure p { text-align: center; }
  .equation-container {
    display: table;
    text-align: center;
    width: 100%;
  }
  .equation {
    vertical-align: middle;
  }
  .equation-label {
    display: table-cell;
    text-align: right;
    vertical-align: middle;
  }
  .inlinetask {
    padding: 10px;
    border: 2px solid gray;
    margin: 10px;
    background: #ffffcc;
  }
  #org-div-home-and-up
   { text-align: right; font-size: 70%; white-space: nowrap; }
  textarea { overflow-x: auto; }
  .linenr { font-size: smaller }
  .code-highlighted { background-color: #ffff00; }
  .org-info-js_info-navigation { border-style: none; }
  #org-info-js_console-label
    { font-size: 10px; font-weight: bold; white-space: nowrap; }
  .org-info-js_search-highlight
    { background-color: #ffff00; color: #000000; font-weight: bold; }
  .org-svg { }
</style>
<script type="text/x-mathjax-config">
    MathJax.Hub.Config({
        displayAlign: "center",
        displayIndent: "0em",

        "HTML-CSS": { scale: 100,
                        linebreaks: { automatic: "false" },
                        webFont: "TeX"
                       },
        SVG: {scale: 100,
              linebreaks: { automatic: "false" },
              font: "TeX"},
        NativeMML: {scale: 100},
        TeX: { equationNumbers: {autoNumber: "AMS"},
               MultLineWidth: "85%",
               TagSide: "right",
               TagIndent: ".8em"
             }
});
</script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_HTML"></script>
</head>
<body>
<div id="content" class="content">
<h1 class="title">Coding</h1>
<div id="table-of-contents" role="doc-toc">
<h2>Table of Contents</h2>
<div id="text-table-of-contents" role="doc-toc">
<ul>
<li><a href="#org1506958">1. Loading and Cleaning Data&#xa0;&#xa0;&#xa0;<span class="tag"><span class="dependency">dependency</span></span></a></li>
<li><a href="#org9057142">2. Computing IQ4 for all Options&#xa0;&#xa0;&#xa0;<span class="tag"><span class="data">data</span></span></a></li>
<li><a href="#orgaf0003b">3. Merging Data with IQ4&#xa0;&#xa0;&#xa0;<span class="tag"><span class="data">data</span></span></a></li>
<li><a href="#org0cb593f">4. Volatility Signature for Options&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span>&#xa0;<span class="figure">figure</span></span></a></li>
<li><a href="#orgb2a499a">5. Volume-Volatility Regressions&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span></span></a>
<ul>
<li><a href="#orga98f0c7">5.1. Clustered Standard Errors&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span>&#xa0;<span class="figure">figure</span></span></a></li>
<li><a href="#org99665d8">5.2. Year by Year Estimates&#xa0;&#xa0;&#xa0;<span class="tag"><span class="robustness">robustness</span></span></a></li>
<li><a href="#org409b0de">5.3. Including BW and FEARS&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span></span></a></li>
<li><a href="#org410619d">5.4. Including IQ4&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span></span></a></li>
<li><a href="#orga4fa00e">5.5. Total Variance&#xa0;&#xa0;&#xa0;<span class="tag"><span class="robustness">robustness</span></span></a></li>
<li><a href="#org2e46c95">5.6. Total Variance + Clustered Standard Errors&#xa0;&#xa0;&#xa0;<span class="tag"><span class="robustness">robustness</span></span></a></li>
</ul>
</li>
<li><a href="#orge99a725">6. Non-Parametric Estimates&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span>&#xa0;<span class="figure">figure</span></span></a></li>
<li><a href="#org876b545">7. Volume-Volatility Elasticity and Disagreement&#xa0;&#xa0;&#xa0;<span class="tag"><span class="figure">figure</span></span></a></li>
</ul>
</div>
</div>
<div id="outline-container-org1506958" class="outline-2">
<h2 id="org1506958"><span class="section-number-2">1.</span> Loading and Cleaning Data&#xa0;&#xa0;&#xa0;<span class="tag"><span class="dependency">dependency</span></span></h2>
<div class="outline-text-2" id="text-1">
<div class="org-src-container">
<pre class="src src-jupyter-python">import pandas as pd
import numpy as np
from functools import partial
from pandas.tseries.offsets import MonthEnd
import os

DATA_FOLDER: str = os.path.join(
    os.path.expanduser("~"), "Research", "Disagreement in Options", "Data"
)
FIGURES_FOLDER: str = os.path.join(
    os.path.expanduser("~"), "Research", "Disagreement in Options", "Paper", "figures"
)

def cleanData(df: pd.DataFrame) -&gt; pd.DataFrame:
    """Cleans data by adding a timestamp index, dropping trade volume outlier,
    selecting options with appropriate trade volume, tenor and moneyness. Also
    creates a rolling log(trade volume) for detrending.
    """
    # Create index with timestamps
    df["timestamp"] = df.date.apply(lambda x: x.normalize())
    df = df.set_index("timestamp").drop(columns=["date"])
    # Remove outlier with extreme trade volume
    df = df.drop(df.trade_volume.idxmax())
    # Clean data by selecting moneyness, volume and tenor
    clean = (
        (df.moneyness &gt;= -5)
        &amp; (df.moneyness &lt;= 3)
        &amp; (df.trade_volume &gt; 0.0)
        &amp; (df.tenor &lt;= 125)
    )
    cleaned_data = df[clean].copy()
    cleaned_data["log_volume"] = np.log(cleaned_data.trade_volume)
    # rolling average for volume
    grouped = cleaned_data.log_volume.groupby(cleaned_data.index).mean()
    cleaned_data["rolling_log_volume"] = grouped.rolling(
        window=252, min_periods=0
    ).mean()
    return cleaned_data


def loadOptionsData(filename, disagreement=True) -&gt; pd.DataFrame:
    """Loads options data on volume and volatility."""
    filename = os.path.join(DATA_FOLDER, filename)
    data = cleanData(pd.read_hdf(filename))
    # Bipower variances in log (units: daily variance)
    data["log_option_BV"] = np.log(data.option_BV)
    data["log_underlying_BV"] = np.log(data.underlying_BV)
    # Realized variances in log (units: daily variance)
    data["log_market_var"] = np.log(data.market_var)
    data["log_residual_var"] = np.log(data.resid_var)
    # Add disagreement measures if required
    if disagreement:
        data = loadDisagreement(data)
    return data


def loadBW2006() -&gt; pd.DataFrame:
    """Loads sentiment index from Baker, Wurgler 2006 stored in an Excel
    spreadsheet (obtained from Wurgler's website). The original series is
    monthly and here is resampled to daily frequency."""
    # Load sentiment data from BW2006
    bw = pd.read_excel(
        os.path.join(DATA_FOLDER, "BW2006data.xlsx"),
        sheet_name=1,
        header=0,
        index_col=0,
    )
    # Select dates with values
    bw = (
        bw[(bw.index &gt;= 200700) &amp; (bw.index &lt;= 201509)]
        .drop(columns=bw.columns.values[1:])
        .astype(np.float_)
    )
    bw.columns = ["BW"]
    # Fix timestamps
    bw["date"] = bw.index
    bw["date"] = bw.date.apply(
        lambda x: partial(pd.to_datetime, format="%Y%m")(x) + MonthEnd(1)
    )
    bw = bw.set_index("date")
    # Resample monthly data to daily data and backfill values
    return bw.resample("1D").asfreq().ffill()


def loadUMCS() -&gt; pd.DataFrame:
    """Loads the consumer sentiment index from University of Michigan (obtained
    from https://fred.stlouisfed.org/series/UMCSENT). The original series is
    monthly and here is resampled to daily frequency."""
    # Load sentiment data from UMichCS
    cs = pd.read_csv(os.path.join(DATA_FOLDER, "UMichCS.csv"), header=0, index_col=0)
    # Fix index to timestamp
    cs["date"] = cs.index
    cs["date"] = cs.date.apply(partial(pd.to_datetime, format="%Y-%m-%d"))
    cs.set_index("date", inplace=True)
    # Select matching dates
    cs = cs[(cs.index.year &gt;= 2007) &amp; (cs.index.year &lt;= 2016)].copy()
    # Rename column
    cs.rename(columns={"UMCSENT": "CS"}, inplace=True)
    # Resample monthly data to daily data and backfill values
    return cs.resample("1D").asfreq().ffill().astype(np.float_)


def loadFEARS() -&gt; pd.DataFrame:
    """Loads FEARS index from DEG2014, sourced from Author's website and
    communication with author. The original series is at the daily frequency."""
    # Load sentiment data from UMichCS
    fears = pd.read_hdf(os.path.join(DATA_FOLDER, "fears.h5"))
    fears.rename(columns={"fears30": "FEARS"}, inplace=True)
    fears.drop(columns=["fears20", "fears25", "fears35"], inplace=True)
    return fears[fears.index.year &gt;= 2007]


def loadSentiment(df: pd.DataFrame) -&gt; pd.DataFrame:
    """Loads all sentiment measures and standardize. Joins with supplied
    database."""
    # Load sentiments
    fears, cs, bw = loadFEARS(), loadUMCS(), loadBW2006()
    # Standardize sentiments
    fears = (fears - fears.mean()) / fears.std()
    cs = (cs - cs.mean()) / cs.std()
    bw = (bw - bw.mean()) / bw.std()
    # Join with data
    return df.join([fears, cs, bw])


def loadDisagreement(all_data: pd.DataFrame) -&gt; pd.DataFrame:
    """Adds disagreement measures to dataframe containing data."""

    # Disagreement Measures
    # FEARS
    # Source: Jia's email + author's website
    fears = pd.read_hdf(os.path.join(DATA_FOLDER, "fears.h5"))
    fears.rename(columns={"fears30": "fears"}, inplace=True)
    all_data = all_data.join(fears.fears)

    # EPU
    epu = pd.read_csv(os.path.join(DATA_FOLDER, "EPU.csv"), header=0)
    epu["date"] = epu.DATE.apply(partial(pd.to_datetime, format="%Y-%m-%d"))
    epu = epu.set_index("date")
    epu = epu.drop("DATE", axis=1)
    epu = epu.rename({"USEPUINDXD": "EPU"}, axis=1)
    all_data = all_data.join(epu)

    # Unemployment Forecast Dispersion
    dispersion = pd.read_hdf(os.path.join(DATA_FOLDER, "unemp_dispersion.h5"))
    all_data = all_data.join(dispersion)

    # Standardize Measures
    all_data["std_dispersion"] = (
        all_data.dispersion - all_data.dispersion.mean()
    ) / all_data.dispersion.std()
    all_data["std_EPU"] = (all_data.EPU - all_data.EPU.mean()) / all_data.EPU.std()
    all_data["std_fears"] = (
        all_data.fears - all_data.fears.mean()
    ) / all_data.fears.std()

    # EPU (MONTHLY)
    epu_monthly = pd.read_csv(os.path.join(DATA_FOLDER, "EPU_monthly.csv"), header=0)

    # Better column name
    epu_monthly.rename(columns={"USEPUINDXM": "EPU_monthly"}, inplace=True)
    # Set index use timestamp
    epu_monthly["date"] = epu_monthly.DATE.apply(
        partial(pd.to_datetime, format="%Y-%m-%d")
    )
    epu_monthly = epu_monthly.set_index("date")
    epu_monthly = epu_monthly.drop("DATE", axis=1)
    # Upsample to daily frequency
    epu_monthly = epu_monthly.resample("1D").asfreq().ffill()
    all_data = all_data.join(epu_monthly)

    # Standardize Monthly Measures
    all_data["std_EPU_monthly"] = (
        all_data.EPU_monthly - all_data.EPU_monthly.mean()
    ) / all_data.EPU_monthly.std()
    return all_data
</pre>
</div>
</div>
</div>
<div id="outline-container-org9057142" class="outline-2">
<h2 id="org9057142"><span class="section-number-2">2.</span> Computing IQ4 for all Options&#xa0;&#xa0;&#xa0;<span class="tag"><span class="data">data</span></span></h2>
<div class="outline-text-2" id="text-2">
<div class="org-src-container">
<pre class="src src-jupyter-python">import os
import pandas as pd
import numpy as np

home = os.path.expanduser('~')
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/volume-volatility-price.h5"


def getIQ4(row):
    assert 'price' in row.index, "Missing prices in row"
    prices = row.price[::5]
    delta = 1/(prices.shape[0] - 1)
    return (np.log(prices).diff()**4).sum()/(3*delta)


dfs = []
with pd.HDFStore(filepath, mode='r') as hdf:
    for key in hdf.keys():
        print(f"Starting: {key}")
        df = hdf.select(key)
        df['IQ4'] = df.apply(getIQ4, axis=1)
        df = df.drop(labels=['price', 'option_BV',
                             'standard', 'underlying_BV'], axis=1)
        df['timestamp'] = df.apply(lambda row: row.date.normalize(), axis=1)
        df = df.set_index('timestamp')
        df = df.drop(labels=['date'], axis=1)
        dfs.append(df)
        print(f"Done: {key}")

data = pd.concat(dfs)
data.to_hdf(f"{home}/{research}/Data/IQ4.h5", "IQ4")
</pre>
</div>
</div>
</div>
<div id="outline-container-orgaf0003b" class="outline-2">
<h2 id="orgaf0003b"><span class="section-number-2">3.</span> Merging Data with IQ4&#xa0;&#xa0;&#xa0;<span class="tag"><span class="data">data</span></span></h2>
<div class="outline-text-2" id="text-3">
<p>
Load both the options data and the IQ4 data.
</p>
<div class="org-src-container">
<pre class="src src-jupyter-python">import os
import pandas as pd
import numpy as np

home = os.path.expanduser('~')
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/IQ4.h5"

data = loadOptionsData('volume-volatility-delta-5.h5')
iq4 = pd.read_hdf(filepath)
</pre>
</div>

<p>
Merge the two data sets using the date, moneyness and tenor as the keys.
</p>
<div class="org-src-container">
<pre class="src src-jupyter-python">iq4 = iq4.reset_index()
data = data.reset_index()
data.rename(columns={'index': 'timestamp'}, inplace=True)

merged = pd.merge(data, iq4,
                  on=['timestamp', 'moneyness', 'tenor'],
                  how='left')
merged.set_index('timestamp', inplace=True)

filename = 'volume-volatility-delta-5-iq4.h5'
merged.to_hdf(f"{home}/{research}/Data/{filename}", "data")
</pre>
</div>
</div>
</div>

<div id="outline-container-org0cb593f" class="outline-2">
<h2 id="org0cb593f"><span class="section-number-2">4.</span> Volatility Signature for Options&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span>&#xa0;<span class="figure">figure</span></span></h2>
<div class="outline-text-2" id="text-4">
<div class="org-src-container">
<pre class="src src-jupyter-python">import os
import pandas as pd

home = os.path.expanduser('~')
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/option-RV-surface.h5"
data = cleanData(pd.read_hdf(filepath))
</pre>
</div>
<div class="org-src-container">
<pre class="src src-jupyter-python"># bin data by moneyness
moneyness_bins = [-5, -3, -0.5, 0.5, 3]
moneyness_cut = pd.cut(data.moneyness, bins=moneyness_bins)
# average RV for each moneyness category
select_RVs = [name for name in data.columns if name.startswith('option_RV')]
avg_RV = data[select_RVs].groupby(moneyness_cut).mean()
# annualize
annualized = 100*np.sqrt(252*avg_RV)
annualized = annualized.T
annualized.index = range(1, 21)
annualized.columns = ['Deep OTM', 'OTM', 'ATM', 'ITM']
# create plot of RVs over the sampling frequency
ax = annualized.plot(figsize=(10, 6),
                     kind='line',
                     color='k',
                     style=["-", "--", "-.", ":"])
ax.grid(True, alpha=0.2)
ax.set_xticks(range(1, 21))
ax.set_ylabel(('Average Realized Volatility\n'
               ' (annualized - units of standard deviation per year, in %)'))
ax.set_xlabel('Sampling Frequency (in minutes)', labelpad=10)
ax.set_title('Realized Variance Signature for SPX Options')
ax.legend(loc='upper right', title='Moneyness Bins')
fig = ax.get_figure()
fig.show()
# fig.savefig(f"{home}/{research}/Paper/figures/signature_style.png", dpi=300)
</pre>
</div>
</div>
</div>

<div id="outline-container-orgb2a499a" class="outline-2">
<h2 id="orgb2a499a"><span class="section-number-2">5.</span> Volume-Volatility Regressions&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span></span></h2>
<div class="outline-text-2" id="text-5">
</div>
<div id="outline-container-orga98f0c7" class="outline-3">
<h3 id="orga98f0c7"><span class="section-number-3">5.1.</span> Clustered Standard Errors&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span>&#xa0;<span class="figure">figure</span></span></h3>
<div class="outline-text-3" id="text-5-1">
<div class="org-src-container">
<pre class="src src-jupyter-python">import os

import pandas as pd
import pyperclip
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

home = os.path.expanduser("~")
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/volume-volatility-delta-5-iq4.h5"

data = pd.read_hdf(os.path.join(DATA_FOLDER, "volume-volatility-delta-5-iq4.h5"))
data = loadSentiment(data)
data["std_BW"] = (data.BW - data.BW.mean()) / data.BW.std()
trimmed = data[
    (data.moneyness &gt;= -5)
    &amp; (data.moneyness &lt;= 3)
    &amp; (data.tenor &lt;= 120)
    &amp; (data.tenor &gt;= 1)
].copy()
</pre>
</div>

<p>
We need to classify the data into clusters (groups) by moneyness and by tenor.
The assumption is that options with similar moneyness and tenor are not i.i.d., so we need to modify the standard errors to account for that. Otherwise, the OLS standard errors will be too small and the parameter estimates too precise.
We cut the moneyness data  into different groups, where each group is identified by an integer.
We do the same for the different tenors.
Then, we combine the two groups into a single list, where each group is identified by an integer.
</p>
<div class="org-src-container">
<pre class="src src-jupyter-python">import itertools

# moneyness_bins = [-5, -3, -0.5, 0.5, 3]
moneyness_bins = range(-5, 4, 1)
by_moneyness = pd.cut(trimmed.moneyness, bins=moneyness_bins, labels=False)
# tenor_bins = [1, 7, 14, 21, 30, 60, 90, 120]
tenor_bins = range(1, 122, 5)
by_tenor = pd.cut(trimmed.tenor, bins=tenor_bins, labels=False)
groups = list(
    itertools.product(range(by_moneyness.max() + 1), range(by_tenor.max() + 1))
)
groups_dict = {g: i for g, i in zip(groups, range(len(groups)))}
clusters = [groups_dict[(i, j)] for i, j in zip(by_moneyness, by_tenor)]
trimmed["clusters"] = clusters
</pre>
</div>

<p>
Run the volume-volatility regressions but use cluster-robust standard errors instead:
</p>
<div class="org-src-container">
<pre class="src src-jupyter-python">spec = (
    "I(log_volume - rolling_log_volume) ~ "
    "1 + (moneyness + np.abs(moneyness))*np.log(tenor) + "
    "log_residual_var"
)
add_moneyness = "+ log_residual_var:(np.abs(moneyness))"
add_tenor = "+ log_residual_var:(np.log(tenor))"
add_interaction = " + log_residual_var:(np.abs(moneyness):np.log(tenor))"

r0 = (
    smf.ols(spec, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)
r1 = (
    smf.ols(spec + add_moneyness, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)
r2 = (
    smf.ols(spec + add_tenor, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)
r3 = (
    smf.ols(spec + add_moneyness + add_tenor, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)
r4 = (
    smf.ols(spec + add_moneyness + add_tenor + add_interaction, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)

reg_order = [
    "Intercept",
    "log_residual_var",
    "log_residual_var:np.abs(moneyness)",
    "log_residual_var:np.log(tenor)",
    "log_residual_var:np.abs(moneyness):np.log(tenor)",
    "log_residual_var:std_BW",
    "log_residual_var:std_fears",
]
inf_dict = {
    "Adjusted R-squared": lambda r: r.rsquared_adj,
    "Observations": lambda r: r.nobs,
}
summary = summary_col(
    [r0, r1, r2, r3, r4],
    float_format="%.4f",
    model_names=["I", "II", "III", "IV", "V"],
    stars=True,
    regressor_order=reg_order,
    info_dict=inf_dict,
)
pyperclip.copy(summary.as_latex())
print(summary)
</pre>
</div>


\begin{table}
  \centering
  \begin{tabular}{lccccc}
    &  I & II & III & IV & V \\
    \toprule
    Baseline Estimates &&&&&\\
    \quad Intercept ($\alpha_0$) & 4.2690 & 4.9748 & 6.2295 & 6.9463 & 7.4682 \\
    & (0.3366) & (0.3105) & (0.3873) & (0.3765) & (0.6330) \\
    \quad Elasticity ($\beta_0$) & 0.2305 & 0.5035 & 0.6355 & 0.9110 & 1.0252 \\
    & (0.0247) & (0.0279) & (0.0768) & (0.0834) & (0.1437) \\
    Explanatory Variables in Elasticity ($\beta_1$) &&&&&\\
    \quad $\abs{\text{Moneyness}}$ & & -0.2031 & & -0.2035 & -0.2893 \\
    &  & (0.0156) & & (0.0148) & (0.0603) \\
    \quad $\ln{\text{Tenor}}$ & & & -0.1176 & -0.1182 & -0.1509 \\
    &  & & (0.0206) & (0.0214) & (0.0389) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$ & & & & & 0.0247 \\
    &  & & & & (0.0165) \\
    \midrule
    Adjusted $R^2$ (in \%) & 22.52 & 23.55 & 22.95 & 23.99 & 24.01 \\
    Number of Observations & \multicolumn{5}{c}{446053}\\
    \bottomrule
  \end{tabular}
\end{table}


\begin{table}[H]
  \centering
  \begin{tabular}{lcccc}
    &I&II&III&IV\\
    \toprule
    Baseline Estimates &&&&\\
    \quad Intercept ($\alpha_0$) & 4.2690 & 4.9748 & 6.9463 & 7.4682 \\
                       & (0.3366) & (0.3105) & (0.3765) & (0.6330) \\
    \quad Elasticity ($\beta_0$) & 0.2305 & 0.5035 & 0.9110 & 1.0252 \\
                       & (0.0247) & (0.0279) & (0.0834) & (0.1437) \\
    Explanatory Variables in Elasticity ($\beta_1$) &&&&\\
    \quad $\abs{\text{Moneyness}}$ & & -0.2031 & -0.2035 & -0.2893 \\
                       &  & (0.0156) & (0.0148) & (0.0603) \\
    \quad $\ln{\text{Tenor}}$ & & & -0.1182 & -0.1509 \\
                       &  & & (0.0214) & (0.0389) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$& & & & 0.0247 \\
                       &  & & & (0.0165) \\
    \midrule
    Adjusted $R^2$ (in \%) & 22.52 & 23.55 & 23.99 & 24.01 \\
    Number of Observations & \multicolumn{4}{c}{446053}\\
    \bottomrule
  \end{tabular}
\end{table}


<p>
Plot the elasticity estimate for varying values of moneyness and tenor. Use the specification <code>IV</code> that includes the interaction term. This is the original plot I created when I had not updated the standard errors to be cluster robust.
</p>
<div class="org-src-container">
<pre class="src src-jupyter-python">import itertools

from matplotlib.colors import LinearSegmentedColormap

cdict = {
    "red": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
    "green": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
    "blue": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
}
better_gray = LinearSegmentedColormap("my_list", cdict)

ms = np.linspace(-5, 3, 100)
ts = [1, 10, 20, 30, 60, 90, 120]
vs = []
params = {key: value for key, value in zip(r4.model.exog_names, r4.params)}


def predict_elasticity(m, t):
    return (
        params["log_residual_var"]
        + params["log_residual_var:np.abs(moneyness)"] * np.abs(m)
        + params["log_residual_var:np.log(tenor)"] * np.log(t)
        + params["log_residual_var:np.abs(moneyness):np.log(tenor)"]
        * np.abs(m)
        * np.log(t)
    )


for m, t in itertools.product(ms, ts):
    vs.append(predict_elasticity(m, t))
betas = pd.DataFrame(index=ms, columns=ts, data=np.reshape(vs, (len(ms), len(ts))))

ax = betas.plot(figsize=(10, 6), colormap=better_gray)
ax.set_title("Elasticity Estimate for Options at Different Moneyness and Tenor")
ax.set_xlabel("Moneyness\n (normalized - in units of standard deviations)")
ax.set_ylabel(r"Elasticity Estimate ($\beta_0$)")

# scatter average option
avg_moneyness = trimmed["moneyness"].mean()
avg_tenor = trimmed["tenor"].mean()
avg_option = ax.scatter(
    avg_moneyness,
    predict_elasticity(avg_moneyness, avg_tenor),
    label="Average Option",
    color="black",
)

no_disag = ax.plot(
    ms[2:],
    np.array([1] * len(ms[2:])),
    color=[0.2, 0.2, 0.2],
    linestyle=":",
    label="No Disagreement",
)

ax.grid(True, alpha=0.2)
labels = [line.get_label() for line in ax.get_lines()[:-1]]
leg1 = ax.legend(labels, title="Tenor (in days)", loc="upper left", framealpha=1)
leg2 = ax.legend(
    [ax.get_lines()[-1], avg_option],
    ["No Disagreement", "Average Option"],
    loc="upper left",
    framealpha=1,
    bbox_to_anchor=(0, 0.565)
)
artist = ax.add_artist(leg1)

ax.set_xlim([-5, 3])
ax.set_ylim(bottom=-0.6)

fig = ax.get_figure()
fig.subplots_adjust(bottom=0.2)
fig.savefig(
    os.path.expanduser(
        "~/Research/OptionsVolumeVolatility/Paper/figures/elasticity_estimate_parametric_upto120_style.png"
    ),
    dpi=300,
)
</pre>
</div>

<p>
Use the specification in column <code>III</code> ignoring the interaction term, since it is insignificant.
</p>
<div class="org-src-container">
<pre class="src src-jupyter-python">import itertools

from matplotlib.colors import LinearSegmentedColormap

cdict = {
    "red": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
    "green": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
    "blue": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
}
better_gray = LinearSegmentedColormap("my_list", cdict)

ms = np.linspace(-5, 3, 100)
ts = [1, 10, 20, 30, 60, 90, 120]
vs = []
params = {key: value for key, value in zip(r3.model.exog_names, r3.params)}


def predict_elasticity(m, t):
    return (
        params["log_residual_var"]
        + params["log_residual_var:np.abs(moneyness)"] * np.abs(m)
        + params["log_residual_var:np.log(tenor)"] * np.log(t)
    )


for m, t in itertools.product(ms, ts):
    vs.append(predict_elasticity(m, t))
betas = pd.DataFrame(index=ms, columns=ts, data=np.reshape(vs, (len(ms), len(ts))))

ax = betas.plot(figsize=(10, 6), colormap=better_gray)
ax.set_title("Elasticity Estimate for Options at Different Moneyness and Tenor")
ax.set_xlabel("Moneyness\n (normalized - in units of standard deviations)")
ax.set_ylabel(r"Elasticity Estimate ($\beta_0$)")

# scatter average option
avg_moneyness = trimmed["moneyness"].mean()
avg_tenor = trimmed["tenor"].mean()
avg_option = ax.scatter(
    avg_moneyness,
    predict_elasticity(avg_moneyness, avg_tenor),
    label="Average Option",
    color="black",
)

no_disag = ax.plot(
    ms[2:],
    np.array([1] * len(ms[2:])),
    color=[0.2, 0.2, 0.2],
    linestyle=":",
    label="No Disagreement",
)

ax.grid(True, alpha=0.2)
labels = [line.get_label() for line in ax.get_lines()[:-1]]
leg1 = ax.legend(labels, title="Tenor (in days)", loc="upper left", framealpha=1)
leg2 = ax.legend(
    [ax.get_lines()[-1], avg_option],
    ["No Disagreement", "Average Option"],
    loc="upper left",
    framealpha=1,
    bbox_to_anchor=(0, 0.565)
)
artist = ax.add_artist(leg1)

ax.set_xlim([-5, 3])
ax.set_ylim(bottom=-0.6)

fig = ax.get_figure()
fig.subplots_adjust(bottom=0.2)
fig.savefig(
    os.path.expanduser(
        "~/Research/OptionsVolumeVolatility/Paper/figures/elasticity_estimate_parametric_upto120_nointeraction_style.png"
    ),
    dpi=300,
)
</pre>
</div>
</div>
</div>

<div id="outline-container-org99665d8" class="outline-3">
<h3 id="org99665d8"><span class="section-number-3">5.2.</span> Year by Year Estimates&#xa0;&#xa0;&#xa0;<span class="tag"><span class="robustness">robustness</span></span></h3>
<div class="outline-text-3" id="text-5-2">
<div class="org-src-container">
<pre class="src src-jupyter-python">import itertools
import os

import pandas as pd
import pyperclip
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

home = os.path.expanduser("~")
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/volume-volatility-delta-5-iq4.h5"

data = pd.read_hdf(filepath)
data = loadSentiment(data)
data["std_BW"] = (data.BW - data.BW.mean()) / data.BW.std()
trimmed = data[
    (data.moneyness &gt;= -5)
    &amp; (data.moneyness &lt;= 3)
    &amp; (data.tenor &lt;= 120)
    &amp; (data.tenor &gt;= 1)
].copy()


moneyness_bins = range(-5, 4, 1)
by_moneyness = pd.cut(trimmed.moneyness, bins=moneyness_bins, labels=False)
tenor_bins = range(1, 122, 5)
by_tenor = pd.cut(trimmed.tenor, bins=tenor_bins, labels=False)
groups = list(
    itertools.product(range(by_moneyness.max() + 1), range(by_tenor.max() + 1))
)
groups_dict = {g: i for g, i in zip(groups, range(len(groups)))}
clusters = [groups_dict[(i, j)] for i, j in zip(by_moneyness, by_tenor)]
trimmed["clusters"] = clusters
</pre>
</div>
<div class="org-src-container">
<pre class="src src-jupyter-python"># spec = (
#     "I(log_volume - rolling_log_volume) ~ "
#     "1 + (moneyness + np.abs(moneyness))*np.log(tenor) + log_residual_var "
#     "+ log_residual_var:(np.abs(moneyness)) + log_residual_var:(np.log(tenor)) "
#     "+ log_residual_var:(np.abs(moneyness):np.log(tenor))"
# )
spec = (
    "I(log_volume - rolling_log_volume) ~ "
    "1 + (moneyness + np.abs(moneyness))*np.log(tenor) + log_residual_var "
    "+ log_residual_var:(np.abs(moneyness)) + log_residual_var:(np.log(tenor)) "
)

years = range(2007, 2017, 2)
res = []
for year in years:
    df = trimmed.loc[str(year) : str(year + 1)]
    model = (
        smf.ols(spec, data=df)
        .fit(cov_type="HC0")
        .get_robustcov_results(cov_type="cluster", groups=df["clusters"])
    )
    res.append(model)

summary = summary_col(
    res,
    float_format="%.4f",
    model_names=[f"{year}-{year+1}" for year in years],
    regressor_order=[
        "Intercept",
        "log_residual_var",
        "log_residual_var:np.abs(moneyness)",
        "log_residual_var:np.log(tenor)",
        "log_residual_var:np.abs(moneyness):np.log(tenor)",
    ],
    info_dict={
        "Adjusted R-squared": lambda r: r.rsquared_adj,
        "Observations": lambda r: f"{r.nobs:.0f}",
    },
    stars=True,
)
pyperclip.copy(summary.as_latex())
print(summary)
</pre>
</div>

\begin{table}
  \centering
  \begin{tabular}{lccccc}
    & 2007-2008 & 2009-2010 & 2011-2012 & 2013-2014 & 2015-2016 \\
    \toprule
    Baseline Estimates &&&&&\\
    \quad Intercept ($\alpha_0$) & 7.0531 & 6.3796 & 6.0686 & 7.8117 & 7.9675 \\
    & (0.4690) & (0.6267) & (0.3012) & (0.6000) & (0.4044) \\
    \quad Elasticity ($\beta_0$) & 1.4135 & 1.0215 & 1.2823 & 1.0060 & 0.8540 \\
    & (0.0985) & (0.1125) & (0.0907) & (0.1587) & (0.0794) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&&&\\
    \quad $\abs{\text{Moneyness}}$ & -0.3959 & -0.3124 & -0.3413 & -0.2950 & -0.1501 \\
    & (0.0336) & (0.0355) & (0.0325) & (0.0298) & (0.0139) \\
    \quad $\ln{\text{Tenor}}$& -0.1445 & -0.0774 & -0.1189 & -0.0673 & -0.1384 \\
    & (0.0235) & (0.0315) & (0.0205) & (0.0417) & (0.0198) \\
    \midrule
    Adjusted R-squared (in \%) & 26.52 & 25.38 & 23.58 & 35.78 & 24.58 \\
    Observations & 40850 & 30391 & 35109 & 72692 & 267011 \\
    \bottomrule
  \end{tabular}
\end{table}

\begin{table}
  \centering
  \begin{tabular}{lccccc}
    & 2007-2008 & 2009-2010 & 2011-2012 & 2013-2014 & 2015-2016 \\
    \toprule
    Baseline Estimates &&&&&\\
    \quad Intercept ($\alpha_0$) & 6.5674 & 7.0969 & 6.2563 & 8.0892 & 8.6087 \\
    & (0.5843) & (1.0657) & (0.3803) & (0.9272) & (0.6940) \\
    \quad Elasticity ($\beta_0$)& 1.3082 & 1.1773 & 1.3258 & 1.0694 & 0.9894 \\
    & (0.1328) & (0.2053) & (0.1154) & (0.2364) & (0.1467) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&&&\\
    \quad $\abs{\text{Moneyness}}$ & -0.2873 & -0.4516 & -0.3761 & -0.3437 & -0.2440 \\
    & (0.0763) & (0.0980) & (0.0534) & (0.1111) & (0.0573) \\
    \quad $\ln{\text{Tenor}}$ & -0.1146 & -0.1206 & -0.1330 & -0.0873 & -0.1770 \\
    & (0.0356) & (0.0586) & (0.0323) & (0.0668) & (0.0392) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$ & -0.0322 & 0.0393 & 0.0116 & 0.0156 & 0.0265 \\
    & (0.0228) & (0.0283) & (0.0183) & (0.0328) & (0.0155) \\
    \midrule
    Adjusted R-squared (in \%) & 26.54 & 25.42 & 23.58 & 35.78 & 24.61 \\
    Observations & 40850 & 30391 & 35109 & 72692 & 267011 \\
    \bottomrule
  \end{tabular}
\end{table}

\begin{table}[!ht]
  \centering
  \begin{tabular}{lcccccccccc}
    & 2007 & 2008 & 2009 & 2010 & 2011 & 2012 & 2013 & 2014 & 2015 & 2016 \\
    \toprule
    Baseline Estimates &&&&&&&&&&\\
    \quad Intercept ($\alpha_0$) & 6.6216 & 6.3872 & 8.4402 & 6.1228 & 4.9916 & 7.5096 & 8.2079 & 8.0573 & 7.7923 & 8.6270 \\
    & (0.2775) & (0.2501) & (0.3775) & (0.3123) & (0.1908) & (0.2307) & (0.2065) & (0.1771) & (0.1162) & (0.1003) \\
    \quad Elasticity ($\beta_0$) & 1.3058 & 1.2897 & 1.5393 & 0.9130 & 1.0370 & 1.5503 & 1.3508 & 0.9160 & 0.6791 & 1.1496 \\
    & (0.0709) & (0.0539) & (0.0734) & (0.0771) & (0.0495) & (0.0566) & (0.0500) & (0.0424) & (0.0261) & (0.0210) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&&&&&&&&\\
    \quad $\abs{\text{Moneyness}}$ & -0.2613 & -0.2386 & -0.5365 & -0.3957 & -0.3213 & -0.3494 & -0.3761 & -0.3033 & -0.2689 & -0.2571 \\
    & (0.0587) & (0.0399) & (0.0477) & (0.0510) & (0.0271) & (0.0357) & (0.0344) & (0.0258) & (0.0130) & (0.0115) \\
    \quad $\ln{\text{Tenor}}$ & -0.0864 & -0.1196 & -0.1499 & -0.0819 & -0.0413 & -0.2133 & -0.1584 & -0.0508 & -0.0426 & -0.2125 \\
    & (0.0194) & (0.0147) & (0.0203) & (0.0215) & (0.0143) & (0.0185) & (0.0176) & (0.0126) & (0.0072) & (0.0059) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$ & -0.0463 & -0.0337 & 0.0370 & 0.0326 & -0.0075 & 0.0108 & 0.0096 & 0.0071 & 0.0329 & 0.0220 \\
    & (0.0170) & (0.0121) & (0.0142) & (0.0152) & (0.0091) & (0.0121) & (0.0128) & (0.0081) & (0.0037) & (0.0032) \\
    \midrule
    Adjusted R-squared & 0.3256 & 0.2340 & 0.2430 & 0.2810 & 0.2179 & 0.2761 & 0.3859 & 0.3514 & 0.3372 & 0.2044 \\
    Observations & 15804 & 25046 & 18738 & 11653 & 20095 & 15014 & 22719 & 49973 & 112356 & 154655 \\
    \bottomrule
  \end{tabular}
\end{table}
</div>
</div>
<div id="outline-container-org409b0de" class="outline-3">
<h3 id="org409b0de"><span class="section-number-3">5.3.</span> Including BW and FEARS&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span></span></h3>
<div class="outline-text-3" id="text-5-3">
<div class="org-src-container">
<pre class="src src-jupyter-python">import os

import pandas as pd
import pyperclip
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

home = os.path.expanduser("~")
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/volume-volatility-delta-5-iq4.h5"

data = pd.read_hdf(os.path.join(DATA_FOLDER, "volume-volatility-delta-5-iq4.h5"))
data = loadSentiment(data)
data["std_BW"] = (data.BW - data.BW.mean()) / data.BW.std()
trimmed = data[
    (data.moneyness &gt;= -5)
    &amp; (data.moneyness &lt;= 3)
    &amp; (data.tenor &lt;= 120)
    &amp; (data.tenor &gt;= 1)
].copy()
</pre>
</div>

<div class="org-src-container">
<pre class="src src-jupyter-python">import itertools

# moneyness_bins = [-5, -3, -0.5, 0.5, 3]
moneyness_bins = range(-5, 4, 1)
by_moneyness = pd.cut(trimmed.moneyness, bins=moneyness_bins, labels=False)
# tenor_bins = [1, 7, 14, 21, 30, 60, 90, 120]
tenor_bins = range(1, 122, 5)
by_tenor = pd.cut(trimmed.tenor, bins=tenor_bins, labels=False)
groups = list(
    itertools.product(range(by_moneyness.max() + 1), range(by_tenor.max() + 1))
)
groups_dict = {g: i for g, i in zip(groups, range(len(groups)))}
clusters = [groups_dict[(i, j)] for i, j in zip(by_moneyness, by_tenor)]
trimmed["clusters"] = clusters
</pre>
</div>

<div class="org-src-container">
<pre class="src src-jupyter-python"># spec = (
#     "I(log_volume - rolling_log_volume) ~ "
#     "1 + (moneyness + np.abs(moneyness))*np.log(tenor) + "
#     "log_residual_var + log_residual_var:(np.abs(moneyness)) + "
#     "log_residual_var:(np.log(tenor)) + "
#     "log_residual_var:(np.abs(moneyness):np.log(tenor))"
# )
spec = (
    "I(log_volume - rolling_log_volume) ~ "
    "1 + (moneyness + np.abs(moneyness))*np.log(tenor) + "
    "log_residual_var + log_residual_var:(np.abs(moneyness)) + "
    "log_residual_var:(np.log(tenor))"
)

# no disagreement measures
trimmed_subsample = trimmed.loc[~trimmed['std_BW'].isnull(), :]
r0 = (
    smf.ols(spec, data=trimmed_subsample)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed_subsample["clusters"])
)

# add BW disagreement
add_BW = (
    "std_BW*(moneyness + np.abs(moneyness) + np.log(tenor))+"
    "log_residual_var:(std_BW)"
)
r1 = (
    smf.ols(f"{spec} + {add_BW}", data=trimmed_subsample)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed_subsample["clusters"])
)

# add FEARS disagreement
add_FEARS = (
    "std_fears*(moneyness + np.abs(moneyness) + np.log(tenor))+"
    "log_residual_var:(std_fears)"
)
r2 = (
    smf.ols(f"{spec}+{add_FEARS}", data=trimmed_subsample)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed_subsample["clusters"])
)
r3 = (
    smf.ols(f"{spec}+{add_BW}+{add_FEARS}", data=trimmed_subsample)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed_subsample["clusters"])
)

reg_order = [
    "Intercept",
    "log_residual_var",
    "log_residual_var:np.abs(moneyness)",
    "log_residual_var:np.log(tenor)",
    "log_residual_var:np.abs(moneyness):np.log(tenor)",
    "log_residual_var:std_BW",
    "log_residual_var:std_fears",
]
inf_dict = {
    "Adjusted R-squared (in \%)": lambda r: f"{100*r.rsquared_adj:.2f}",
    "Observations": lambda r: f"{r.nobs:.0f}",
}
summary = summary_col(
    [r0, r1, r2, r3],
    float_format="%.4f",
    model_names=["I", "II", "III", "IV"],
    stars=True,
    regressor_order=reg_order,
    info_dict=inf_dict,
)
pyperclip.copy(summary.as_latex())
print(summary)
</pre>
</div>


\begin{table}
  \centering
  \caption{}
  \begin{tabular}{lcccc}
    &  I &  II & III & IV \\
    \toprule
    Baseline Estimates &&&&\\
    \quad Elasticity ($\beta_0$) & 0.965 & 0.955 & 0.969 & 0.959 \\
    & (0.104) & (0.104) & (0.104) & (0.104) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&&\\
    \quad $\abs{\text{Moneyness}}$ & -0.237 & -0.240 & -0.240 & -0.242 \\
    & (0.020) & (0.020) & (0.020) & (0.020) \\
    \quad $\ln{\text{Tenor}}$ & -0.086 & -0.079 & -0.085 & -0.078 \\
    & (0.026) & (0.026) & (0.026) & (0.026) \\
    \quad BW & & 0.073 & & 0.072 \\
    &  & (0.014) & & (0.014) \\
    \quad FEARS & &  & -0.056 & -0.057 \\
    &  & & (0.011) & (0.011) \\
    \midrule
    Adjusted $R^2$ (in \%) & 26.44 & 26.76 & 26.47 & 26.80 \\
    Observations & \multicolumn{4}{c}{256453} \\
    \bottomrule
  \end{tabular}
\end{table}

\begin{table}
  \centering
  \begin{tabular}{lccc}
    &  I & II & III \\
    \toprule
    Baseline Estimates &&&\\
    \quad Intercept ($\alpha_0$) & 6.9463 & 6.4742 & 6.4783 \\
    & (0.3765) & (0.4201) & (0.4186) \\
    \quad Elasticity ($\beta_0$) & 0.9110 & 0.9552 & 0.9599 \\
    & (0.0834) & (0.1047) & (0.1045) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&\\
    \quad $\abs{\text{Moneyness}}$& -0.2035 & -0.2401 & -0.2429 \\
    & (0.0148) & (0.0204) & (0.0205) \\
    \quad $\ln{\text{Tenor}}$ & -0.1182 & -0.0797 & -0.0786 \\
    & (0.0214) & (0.0267) & (0.0265) \\
    \quad BW & & 0.0737 & 0.0720 \\
    &  & (0.0145) & (0.0142) \\
    \quad FEARS& & & -0.0575 \\
    &  & & (0.0111) \\
    \midrule
    Adjusted R-squared (in \%) & 23.99 & 26.76 & 26.80 \\
    Observations & 446053 & 256453 & 256453 \\
    \bottomrule
  \end{tabular}
\end{table}

\begin{table}
  \centering
  \begin{tabular}{lccc}
    &  I & II & III \\
    \toprule
    Baseline Estimates &&&\\
    \quad Intercept ($\alpha_0$) & 7.4682 & 7.4070 & 7.4013 \\
    & (0.6330) & (0.7186) & (0.7138) \\
    \quad Elasticity ($\beta_0$) & 1.0252 & 1.1691 & 1.1716 \\
    & (0.1437) & (0.1704) & (0.1697) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&\\
    \quad $\abs{\text{Moneyness}}$ & -0.2893 & -0.4070 & -0.4081 \\
    & (0.0603) & (0.0715) & (0.0712) \\
    \quad $\ln{\text{Tenor}}$ & -0.1509 & -0.1413 & -0.1396 \\
    & (0.0389) & (0.0459) & (0.0457) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$& 0.0247 & 0.0497 & 0.0491 \\
    & (0.0165) & (0.0204) & (0.0203) \\
    \quad BW & & 0.0732 & 0.0715 \\
    &  & (0.0144) & (0.0141) \\
    \quad FEARS & & & -0.0561 \\
    &  & & (0.0109) \\
    \midrule
    Adjusted R-squared (in \%) & 24.01 & 26.84 & 26.87 \\
    Observations & 446053 & 256453 & 256453 \\
    \bottomrule
  \end{tabular}
\end{table}

\begin{table}
  \centering
  \begin{tabular}{lccc}
    \toprule
    Baseline Estimates &&&\\
    \quad Intercept ($\alpha_0$) & 7.4682 & 7.4070 & 7.4013 \\
                       & (0.0568) & (0.0723) & (0.0722) \\
    \quad Elasticity ($\beta_0$) & 1.0252 & 1.1691 & 1.1716 \\
                       & (0.0126) & (0.0168) & (0.0168) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&\\
    \quad $\abs{\text{Moneyness}}$ & -0.2893 & -0.4070 & -0.4081 \\
                       & (0.0072) & (0.0100) & (0.0100) \\
    \quad $\ln{\text{Tenor}}$ & -0.1509 & -0.1413 & -0.1396 \\
                       & (0.0036) & (0.0048) & (0.0048) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$ & 0.0247 & 0.0497 & 0.0491 \\
                       & (0.0021) & (0.0030) & (0.0030) \\
    \quad BW & & 0.0732 & 0.0715 \\
                       &  & (0.0051) & (0.0051) \\
    \quad FEARS & & & -0.0561 \\
                       &  & & (0.0056) \\
    \midrule
    Adjusted R-squared & 0.2401 & 0.2684 & 0.2687 \\
    Observations & 446053 & \multicolumn{2}{c}{256453}\\
    \bottomrule
  \end{tabular}
\end{table}
</div>
</div>
<div id="outline-container-org410619d" class="outline-3">
<h3 id="org410619d"><span class="section-number-3">5.4.</span> Including IQ4&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span></span></h3>
<div class="outline-text-3" id="text-5-4">
<div class="org-src-container">
<pre class="src src-jupyter-python">import itertools
import os

import pandas as pd
import pyperclip
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

home = os.path.expanduser("~")
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/volume-volatility-delta-5-iq4.h5"

data = pd.read_hdf(filepath)
data = loadSentiment(data)
data["std_BW"] = (data.BW - data.BW.mean()) / data.BW.std()
data["std_IQ4"] = (data.IQ4 - data.IQ4.mean()) / data.IQ4.std()
trimmed = data[
    (data.moneyness &gt;= -5)
    &amp; (data.moneyness &lt;= 3)
    &amp; (data.tenor &lt;= 120)
    &amp; (data.tenor &gt;= 1)
]

# create cluster groups
moneyness_bins = range(-5, 4, 1)
by_moneyness = pd.cut(trimmed.moneyness, bins=moneyness_bins, labels=False)
tenor_bins = range(1, 122, 5)
by_tenor = pd.cut(trimmed.tenor, bins=tenor_bins, labels=False)
groups = list(
    itertools.product(range(by_moneyness.max() + 1), range(by_tenor.max() + 1))
)
groups_dict = {g: i for g, i in zip(groups, range(len(groups)))}
clusters = [groups_dict[(i, j)] for i, j in zip(by_moneyness, by_tenor)]
trimmed["clusters"] = clusters
</pre>
</div>

<div class="org-src-container">
<pre class="src src-jupyter-python"># spec = (
#     "I(log_volume - rolling_log_volume) ~ "
#     "1 + (moneyness + np.abs(moneyness))*np.log(tenor) + "
#     "log_residual_var + log_residual_var:(np.abs(moneyness)) + "
#     "log_residual_var:(np.log(tenor)) + "
#     "log_residual_var:(np.abs(moneyness):np.log(tenor))"
# )

spec = (
    "I(log_volume - rolling_log_volume) ~ "
    "1 + (moneyness + np.abs(moneyness))*np.log(tenor) + "
    "log_residual_var + log_residual_var:(np.abs(moneyness)) + "
    "log_residual_var:(np.log(tenor))"
)

# no disagreement measures
r0 = (
    smf.ols(spec, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)

# add BW disagreement
trimmed_subsample = trimmed.loc[~trimmed["std_BW"].isnull(), :]
add_BW = (
    "std_BW*(moneyness + np.abs(moneyness) + np.log(tenor))+"
    "log_residual_var:(std_BW)"
)
r1 = (
    smf.ols(f"{spec} + {add_BW}", data=trimmed_subsample)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed_subsample["clusters"])
)

# add FEARS disagreement
add_FEARS = (
    "std_fears*(moneyness + np.abs(moneyness) + np.log(tenor))+"
    "log_residual_var:(std_fears)"
)
r2 = (
    smf.ols(f"{spec}+{add_BW}+{add_FEARS}", data=trimmed_subsample)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed_subsample["clusters"])
)

# add IQ4 disagreement
add_IQ4 = (
    "std_IQ4*(moneyness + np.abs(moneyness) + np.log(tenor))+"
    "log_residual_var:(std_IQ4)"
)
r3 = (
    smf.ols(f"{spec}+{add_BW}+{add_FEARS}+{add_IQ4}", data=trimmed_subsample)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed_subsample["clusters"])
)
reg_order = [
    "Intercept",
    "log_residual_var",
    "log_residual_var:np.abs(moneyness)",
    "log_residual_var:np.log(tenor)",
    "log_residual_var:np.abs(moneyness):np.log(tenor)",
    "log_residual_var:std_BW",
    "log_residual_var:std_fears",
    "log_residual_var:std_IQ4",
]
inf_dict = {
    "Adjusted R-squared": lambda r: r.rsquared_adj,
    "Observations": lambda r: r.nobs,
}
summary = summary_col(
    [r0, r1, r2, r3],
    float_format="%.4f",
    model_names=["I", "II", "III", "IV"],
    stars=True,
    regressor_order=reg_order,
    info_dict=inf_dict,
)
pyperclip.copy(summary.as_latex())
print(summary)
</pre>
</div>

\begin{table}
\centering
\begin{tabular}{lcccc}
  &  I &  II & III & IV \\
  \toprule
  \quad Intercept ($\alpha_0$) & 6.9463 & 6.4742 & 6.4783 & 6.4702 \\
  & (0.3765) & (0.4201) & (0.4186) & (0.4190) \\
  \quad Elasticity ($\beta_0$) & 0.9110 & 0.9552 & 0.9599 & 0.9588 \\
  & (0.0834) & (0.1047) & (0.1045) & (0.1044) \\
  Explanatory Variables for Elasticity ($\beta_1$) &&&&\\
  \quad $\abs{\text{Moneyness}}$ & -0.2035 & -0.2401 & -0.2429 & -0.2438 \\
  & (0.0148) & (0.0204) & (0.0205) & (0.0206) \\
  \quad $\ln{\text{Tenor}}$& -0.1182 & -0.0797 & -0.0786 & -0.0782 \\
  & (0.0214) & (0.0267) & (0.0265) & (0.0266) \\
  \quad BW& & 0.0737 & 0.0720 & 0.0722 \\
  &  & (0.0145) & (0.0142) & (0.0142) \\
  \quad FEARS & &  & -0.0575 & -0.0575 \\
  &  & & (0.0111) & (0.0111) \\
  \quad IQ4 & &  & & -0.0140 \\
  &  & &  & (0.0036) \\
  \midrule
  Adjusted R-squared (in \%) & 23.99 & 26.76 & 26.80 & 26.80 \\
  Observations & 446053 & \multicolumn{3}{c}{256453}\\
  \bottomrule
\end{tabular}
\end{table}

\begin{table}
  \centering
  \begin{tabular}{lcccc}
    \toprule
    \quad Intercept ($\alpha_0$) & 7.4682 & 7.4070 & 7.4013 & 7.4056 \\
                                 & (0.0568) & (0.0723) & (0.0722) & (0.0725) \\
    \quad Elasticity ($\beta_0$) & 1.0252 & 1.1691 & 1.1716 & 1.1733 \\
                                 & (0.0126) & (0.0168) & (0.0168) & (0.0168) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&&\\
    \quad $\abs{\text{Moneyness}}$ & -0.2893 & -0.4070 & -0.4081 & -0.4118 \\
                                 & (0.0072) & (0.0100) & (0.0100) & (0.0100) \\
    \quad $\ln{\text{Tenor}}$ & -0.1509 & -0.1413 & -0.1396 & -0.1399 \\
                                 & (0.0036) & (0.0048) & (0.0048) & (0.0048) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$& 0.0247 & 0.0497 & 0.0491 & 0.0498 \\
                                 & (0.0021) & (0.0030) & (0.0030) & (0.0030) \\
    \quad BW & & 0.0732 & 0.0715 & 0.0717 \\
                                 &  & (0.0051) & (0.0051) & (0.0051) \\
    \quad FEARS & & & -0.0561 & -0.0560 \\
                                 &  & & (0.0056) & (0.0056) \\
    \quad IQ4 & & & & -0.0125 \\
                                 &  & & & (0.0029) \\
    \midrule
    Adjusted R-squared & 0.2401 & 0.2684 & 0.2687 & 0.2688 \\
    Observations & 446053 & \multicolumn{3}{c}{256453}\\
    \bottomrule
  \end{tabular}
\end{table}
</div>
</div>

<div id="outline-container-orga4fa00e" class="outline-3">
<h3 id="orga4fa00e"><span class="section-number-3">5.5.</span> Total Variance&#xa0;&#xa0;&#xa0;<span class="tag"><span class="robustness">robustness</span></span></h3>
<div class="outline-text-3" id="text-5-5">
<div class="org-src-container">
<pre class="src src-jupyter-python">import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
import pyperclip

data = loadOptionsData('volume-volatility-delta-5.h5')
</pre>
</div>

<div class="org-src-container">
<pre class="src src-jupyter-python">trimmed = data[(data.moneyness &gt;= -5) &amp; (data.moneyness &lt;= 3) &amp;
               (data.tenor &lt;= 120) &amp; (data.tenor &gt;= 1)].copy()
spec = ('I(log_volume - rolling_log_volume) ~ '
        '1 + (moneyness + np.abs(moneyness))*np.log(tenor) + log_option_BV ')
r0 = smf.ols(spec, data=trimmed).fit(cov_type='HC0')
r1 = smf.ols(spec + ' + log_option_BV:(np.abs(moneyness))',
             data=trimmed).fit(cov_type='HC0')
r2 = smf.ols(spec + ' + log_option_BV:(np.abs(moneyness)) + log_option_BV:(np.log(tenor))',
             data=trimmed).fit(cov_type='HC0')
r3 = smf.ols(spec + ' + log_option_BV:(np.abs(moneyness)) + log_option_BV:(np.log(tenor)) + log_option_BV:(np.abs(moneyness):np.log(tenor))',
             data=trimmed).fit(cov_type='HC0')

summary = summary_col([r0, r1, r2, r3], float_format='%.4f', model_names=['Base', 'Moneyness', 'Tenor', 'All'], stars=True,
                      regressor_order=['Intercept', 'log_option_BV',
                                       'log_option_BV:np.abs(moneyness)', 'log_option_BV:np.log(tenor)', 'log_option_BV:np.abs(moneyness):np.log(tenor)'],
                      info_dict={'Adjusted R-squared': lambda r: r.rsquared_adj})
pyperclip.copy(summary.as_latex())
print(summary)
</pre>
</div>

\begin{table}[ht]
  \centering
  \begin{tabular}{lcccc}
    \toprule
    Baseline &&&&\\
    \quad Intercept ($\alpha_0$) & 3.6663*** & 3.7457*** & 5.3555*** & 5.8422*** \\
             & (0.0204) & (0.0203) & (0.0259) & (0.0351) \\
    \quad Elasticity ($\beta_0$) & 0.0528*** & 0.3589*** & 1.1170*** & 1.3379*** \\
             & (0.0049) & (0.0079) & (0.0119) & (0.0165) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&&\\
    \quad $\abs{\text{Moneyness}}$ & & -0.2176*** & -0.2196*** & -0.3834*** \\
             &  & (0.0041) & (0.0041) & (0.0091) \\
    \quad $\ln{\text{Tenor}}$ & & & -0.2079*** & -0.2695*** \\
             &  & & (0.0026) & (0.0043) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$ & & & & 0.0459*** \\
             &  & & & (0.0025) \\
    \midrule
    Adjusted R-squared & 0.2171 & 0.2227 & 0.2325 & 0.2330 \\
    \bottomrule
  \end{tabular}
\end{table}
</div>
</div>
<div id="outline-container-org2e46c95" class="outline-3">
<h3 id="org2e46c95"><span class="section-number-3">5.6.</span> Total Variance + Clustered Standard Errors&#xa0;&#xa0;&#xa0;<span class="tag"><span class="robustness">robustness</span></span></h3>
<div class="outline-text-3" id="text-5-6">
<div class="org-src-container">
<pre class="src src-jupyter-python">import itertools
import os

import pandas as pd
import pyperclip
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

home = os.path.expanduser("~")
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/volume-volatility-delta-5-iq4.h5"

data = pd.read_hdf(os.path.join(DATA_FOLDER, "volume-volatility-delta-5-iq4.h5"))
data = loadSentiment(data)
data["std_BW"] = (data.BW - data.BW.mean()) / data.BW.std()
trimmed = data[
    (data.moneyness &gt;= -5)
    &amp; (data.moneyness &lt;= 3)
    &amp; (data.tenor &lt;= 120)
    &amp; (data.tenor &gt;= 1)
].copy()

moneyness_bins = range(-5, 4, 1)
by_moneyness = pd.cut(trimmed.moneyness, bins=moneyness_bins, labels=False)
tenor_bins = range(1, 122, 5)
by_tenor = pd.cut(trimmed.tenor, bins=tenor_bins, labels=False)
groups = list(
    itertools.product(range(by_moneyness.max() + 1), range(by_tenor.max() + 1))
)
groups_dict = {g: i for g, i in zip(groups, range(len(groups)))}
clusters = [groups_dict[(i, j)] for i, j in zip(by_moneyness, by_tenor)]
trimmed["clusters"] = clusters
</pre>
</div>

<p>
Run the regression but display the cluster-robust standard errors.
</p>
<div class="org-src-container">
<pre class="src src-jupyter-python">spec = (
    "I(log_volume - rolling_log_volume) ~ "
    "1 + (moneyness + np.abs(moneyness))*np.log(tenor) + log_option_BV "
)
add_moneyness = " + log_option_BV:(np.abs(moneyness))"
add_tenor = " + log_option_BV:(np.log(tenor))"
add_interaction = " + log_option_BV:(np.abs(moneyness):np.log(tenor))"
r0 = (
    smf.ols(spec, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)
r1 = (
    smf.ols(spec + add_moneyness, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)
r2 = (
    smf.ols(spec + add_tenor, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)
r3 = (
    smf.ols(spec + add_moneyness + add_tenor, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)
r4 = (
    smf.ols(spec + add_moneyness + add_tenor + add_interaction, data=trimmed)
    .fit()
    .get_robustcov_results(cov_type="cluster", groups=trimmed["clusters"])
)

summary = summary_col(
    [r0, r1, r2, r3, r4],
    float_format="%.4f",
    model_names=["I", "II", "III", "IV", "V"],
    stars=True,
    regressor_order=[
        "Intercept",
        "log_option_BV",
        "log_option_BV:np.abs(moneyness)",
        "log_option_BV:np.log(tenor)",
        "log_option_BV:np.abs(moneyness):np.log(tenor)",
    ],
    info_dict={"Adjusted R-squared": lambda r: r.rsquared_adj,
               "Number of Observations": lambda r: r.nobs},
)
pyperclip.copy(summary.as_latex())
print(summary)
</pre>
</div>

\begin{table}
\caption{}
\begin{center}
  \begin{tabular}{lccccc}
      & I & II & III & IV & V \\
      \toprule
      Baseline &&&&&\\
      \quad Elasticity ($\beta_0$) & 0.0528 & 0.3589 & 0.8038 & 1.1170 & 1.3379 \\
      & (0.0279) & (0.0519) & (0.0977) & (0.1047) & (0.1956) \\
      Explanatory Variables for Elasticity ($\beta_1$) &&&&&\\
      \quad $\abs{\text{Moneyness}}$ & & -0.2176 & & -0.2196 & -0.3834 \\
      &  & (0.0251) & & (0.0205) & (0.0827) \\
      \quad $\ln{\text{Tenor}}$ & & & -0.2067 & -0.2079 & -0.2695 \\
      &  & & (0.0266) & (0.0274) & (0.0535) \\
      \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$ & & & & & 0.0459 \\
      &  & & & & (0.0225) \\
      \midrule
      Adjusted $R^2$ (in \%) & 21.71 & 22.27 & 22.67 & 23.25 & 23.30 \\
      Number of Observations & \multicolumn{5}{c}{446053}\\
      \bottomrule
    \end{tabular}
\end{center}
\end{table}

\begin{table}
  \centering
  \begin{tabular}{lcccc}
    \toprule
    Baseline &&&&\\
    \quad Intercept ($\alpha_0$) & 3.6663 & 3.7457 & 5.3555 & 5.8422 \\
             & (0.3258) & (0.3004) & (0.3294) & (0.5431) \\
    \quad Elasticity ($\beta_0$) & 0.0528 & 0.3589 & 1.1170 & 1.3379 \\
             & (0.0279) & (0.0519) & (0.1047) & (0.1956) \\
    Explanatory Variables for Elasticity ($\beta_1$) &&&&\\
    \quad $\abs{\text{Moneyness}}$& & -0.2176 & -0.2196 & -0.3834 \\
             &  & (0.0251) & (0.0205) & (0.0827) \\
    \quad $\ln{\text{Tenor}}$ & & & -0.2079 & -0.2695 \\
             &  & & (0.0274) & (0.0535) \\
    \quad $\abs{\text{Moneyness}}\cdot\ln{\text{Tenor}}$ & & & & 0.0459 \\
             &  & & & (0.0225) \\
    \midrule
    Adjusted R-squared & 0.2171 & 0.2227 & 0.2325 & 0.2330 \\
    Number of Observations & \multicolumn{4}{c}{446053}\\
    \bottomrule
  \end{tabular}
\end{table}
</div>
</div>
</div>
<div id="outline-container-orge99a725" class="outline-2">
<h2 id="orge99a725"><span class="section-number-2">6.</span> Non-Parametric Estimates&#xa0;&#xa0;&#xa0;<span class="tag"><span class="results">results</span>&#xa0;<span class="figure">figure</span></span></h2>
<div class="outline-text-2" id="text-6">
<div class="org-src-container">
<pre class="src src-jupyter-python">import os

import pandas as pd
import pyperclip
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

home = os.path.expanduser("~")
research = "Research/OptionsVolumeVolatility"
filepath = f"{home}/{research}/Data/volume-volatility-delta-5-iq4.h5"

data = pd.read_hdf(os.path.join(DATA_FOLDER, "volume-volatility-delta-5-iq4.h5"))
data = loadSentiment(data)
data["std_BW"] = (data.BW - data.BW.mean()) / data.BW.std()
trimmed = data[
    (data.moneyness &gt;= -5)
    &amp; (data.moneyness &lt;= 3)
    &amp; (data.tenor &lt;= 120)
    &amp; (data.tenor &gt;= 1)
].copy()
</pre>
</div>

<div class="org-src-container">
<pre class="src src-jupyter-python">from typing import Tuple


def vvRegressionNoControl(df: pd.DataFrame) -&gt; Tuple[float, ...]:
    """Regression between detrended volume and two mesaures of variance."""
    specification = "I(log_volume - rolling_log_volume) ~ " "log_residual_var"
    res = smf.ols(specification, data=df).fit(cov_type="HC0")
    stderr = res.cov_HC1.diagonal() ** 0.5
    return (
        f"{res.params.Intercept:.2f}",
        f"{stderr[0]:.4f}",
        f"{res.params.log_residual_var:.2f}",
        f"{stderr[1]:.4f}",
        f"{res.rsquared_adj:.4f}",
        f"{res.nobs:.0f}",
    )
</pre>
</div>

<div class="org-src-container">
<pre class="src src-jupyter-python"># Bin options by moneyness and tenor
moneyness_bins = [-5, -4, -3, -2.5, -2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2, 3]
tenor_bins = [1, 20, 40, 60, 80, 100, 120]  # CHANGED TO START AT 1 DAY
m_bins = pd.cut(trimmed.moneyness, bins=moneyness_bins)
t_bins = pd.cut(trimmed.tenor, bins=tenor_bins)
# Run volume-volatility regressions
results = trimmed.groupby([(m_bins), (t_bins)]).apply(vvRegressionNoControl)
results = results.apply(pd.Series)
results.columns = [
    "intercept",
    "stderr_intercept",
    "residual",
    "stderr_residual",
    "rsquared",
    "total_obs",
]
np_elasticity = results["residual"].unstack(level="tenor").astype(np.float_)
np_elasticity.index = [
    (a + b) / 2 for a, b in zip(moneyness_bins[:-1], moneyness_bins[1:])
]
np_elasticity.columns = [str(v) for v in np_elasticity.columns.values]
</pre>
</div>

<div class="org-src-container">
<pre class="src src-jupyter-python">from matplotlib.colors import LinearSegmentedColormap

cdict = {
    "red": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
    "green": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
    "blue": ((0.0, 0.0, 0.0), (1.0, 0.8, 0.8)),
}
better_gray = LinearSegmentedColormap("my_list", cdict)

ax = np_elasticity.plot(figsize=(10, 6), colormap=better_gray)
ax.set_title("Elasticity Estimates (Fixing Tenor Groups)")
ax.set_xlabel("Moneyness\n (normalized - in units of standard deviations)")
ax.set_ylabel(r"Elasticity Estimate ($\hat{\beta}_0$)")
ax.grid(True, alpha=0.2)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(
    loc="upper left",
    bbox_to_anchor=(1, 1.02),
    title="Tenor Interval (in days)",
    framealpha=1,
)
# ax.set_xlim([-5, 3])
ax.set_ylim(bottom=-0.4, top=1)
fig = ax.get_figure()
fig.subplots_adjust(bottom=0.2, right=0.8)
fig.savefig(
    os.path.expanduser(
        "~/Research/OptionsVolumeVolatility/Paper/figures/elasticity_nonparametric_style.png"
    ),
    dpi=300,
)

</pre>
</div>


<div id="org41bfbf5" class="figure">
<p><img src="./.ob-jupyter/ead7eaa4d690741d7e447d4860cbe6c2d67ae8e3.png" alt="ead7eaa4d690741d7e447d4860cbe6c2d67ae8e3.png" />
</p>
</div>


<div class="org-src-container">
<pre class="src src-jupyter-python">moneyness_labels = []
for m_low, m_up in zip(moneyness_bins[:-1], moneyness_bins[1:]):
    moneyness_labels.append(f'({m_low},{m_up}]')
tenor_labels = []
for t_low, t_up in zip(tenor_bins[:-1], tenor_bins[1:]):
    tenor_labels.append(f'({t_low},{t_up}]')
</pre>
</div>
<div class="org-src-container">
<pre class="src src-jupyter-python">import matplotlib.pyplot as plt
from scipy.stats import norm

residual = results.residual.unstack().astype(np.float_)
stderr_residual = results.stderr_residual.unstack().astype(np.float_)
alpha = 0.01
cv = norm.ppf(1 - alpha / 2)
ub = residual + cv * stderr_residual
lb = residual - cv * stderr_residual

fig, axes = plt.subplots(
    nrows=1, ncols=len(residual.index), figsize=(16, 6), sharey=True
)
for ax, i in zip(axes, range(len(axes))):
    residual.T.iloc[:, [i]].plot(ax=ax, legend=False, color="black")
    ax.fill_between(
        ax.lines[0].get_xdata(),
        ub.T.iloc[:, [i]].values.ravel(),
        lb.T.iloc[:, [i]].values.ravel(),
        color=ax.lines[0].get_color(),
        alpha=0.2,
    )
    ax.grid(True)
    ax.set_xlabel(None)
    ax.set_xticks([0, 1, 3, 5])
    ax.set_xlim(left=0, right=5)
    ax.set_xticklabels(["1", "30", "60", "120"], rotation=0, fontsize=8)
    ax.set_title(moneyness_labels[i])
axes[0].set_ylabel(r"Elasticity Estimate ($\hat{\beta}_0$)", fontsize=14)

fig.subplots_adjust(top=0.8)
fig.text(0.5, 0.01, "Tenor (in days)", ha="center", fontsize=14)
fig.text(0.5, 0.88, "Moneyness Groups", ha="center", fontsize=14)
_ = fig.suptitle(
    "Elasticity Estimates (Fixing Moneyness Groups)",
    ha="center",
    fontsize="xx-large",
    y=0.99,
)
fig.savefig(
    os.path.expanduser(
        "~/Research/OptionsVolumeVolatility/Paper/figures/elasticity_nonparametric_slice_moneyness.png"
    ),
    dpi=300,
)
</pre>
</div>


<div id="org80f8c24" class="figure">
<p><img src="./.ob-jupyter/78462eced0e3bc342ae301eb09ba37307d75dc5b.png" alt="78462eced0e3bc342ae301eb09ba37307d75dc5b.png" />
</p>
</div>


<div class="org-src-container">
<pre class="src src-jupyter-python">alpha = 0.01
cv = norm.ppf(1 - alpha / 2)
ub = residual + cv * stderr_residual
lb = residual - cv * stderr_residual

fig, axes = plt.subplots(
    nrows=1, ncols=len(residual.columns), figsize=(16, 6), sharey=True
)
for ax, i in zip(axes, range(len(axes))):
    residual.iloc[:, [i]].plot(ax=ax, legend=False, color="black")
    ax.fill_between(
        ax.lines[0].get_xdata(),
        ub.iloc[:, [i]].values.ravel(),
        lb.iloc[:, [i]].values.ravel(),
        color=ax.lines[0].get_color(),
        alpha=0.2,
    )
    ax.grid(True)
    ax.set_xlabel(None)
    ax.set_xticks([0, 4, 7, 10])
    ax.set_xlim(left=0, right=11)
    ax.set_xticklabels(["Deep OTM", "OTM", "ATM", "ITM"], rotation=0, fontsize=8)
    ax.set_title(tenor_labels[i])
axes[0].set_ylabel("Elasticity Estimate", fontsize=14)

fig.subplots_adjust(top=0.8)
fig.text(
    0.5,
    0.01,
    "Moneyness (normalized - in units of standard deviations)",
    ha="center",
    fontsize=14,
)
fig.text(0.5, 0.88, "Tenor Groups", ha="center", fontsize=14)
_ = fig.suptitle(
    "Elasticity Estimates (Fixing Tenor Groups)",
    ha="center",
    fontsize="xx-large",
    y=0.99,
)
fig.savefig(
    os.path.expanduser(
        "~/Research/OptionsVolumeVolatility/Paper/figures/elasticity_nonparametric_slice_tenor.png"
    ),
    dpi=300,
)
</pre>
</div>



<div id="org7d694fd" class="figure">
<p><img src="./.ob-jupyter/ab661fdf6de7b758bc2bc9b10b82759fd56bad67.png" alt="ab661fdf6de7b758bc2bc9b10b82759fd56bad67.png" />
</p>
</div>


<div class="org-src-container">
<pre class="src src-jupyter-python">fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)

axes[0].plot(range(12), residual.iloc[:, 1], color='black')
axes[0].fill_between(
    axes[0].lines[0].get_xdata(),
    ub.iloc[:, [1]].values.ravel(),
    lb.iloc[:, [1]].values.ravel(),
    color=axes[0].lines[0].get_color(),
    alpha=0.2,
)
axes[0].grid(True, alpha=0.2)
axes[0].set_xlabel(None)
axes[0].set_xticks([0, 4, 7, 10])
axes[0].set_xticklabels(["Deep OTM", "OTM", "ATM", "ITM"], rotation=0, fontsize=8)
axes[0].set_xlim(left=0, right=11)
axes[1].plot(range(6), residual.iloc[6, :], color='black')
axes[1].fill_between(
    axes[1].lines[0].get_xdata(),
    ub.iloc[6, :].values.ravel(),
    lb.iloc[6, :].values.ravel(),
    color=axes[1].lines[0].get_color(),
    alpha=0.2,
)
axes[1].grid(True, alpha=0.2)
axes[1].set_xlabel(None)
axes[1].set_xticks([0, 1, 3, 5])
axes[1].set_xlim(left=0, right=5)
axes[1].set_xticklabels(["1", "30", "60", "120"], rotation=0, fontsize=8)
axes[0].set_xlabel('Moneyness')
axes[0].set_ylabel('Volume-Volatility Elasticity')
axes[1].set_xlabel('Tenor (days)')

fig.savefig(
    os.path.expanduser(
        "~/Research/OptionsVolumeVolatility/Paper/figures/elasticity_nonparametric_summary.png"
    ),
    dpi=300,
)
</pre>
</div>
</div>
</div>
<div id="outline-container-org876b545" class="outline-2">
<h2 id="org876b545"><span class="section-number-2">7.</span> Volume-Volatility Elasticity and Disagreement&#xa0;&#xa0;&#xa0;<span class="tag"><span class="figure">figure</span></span></h2>
<div class="outline-text-2" id="text-7">
<div class="org-src-container">
<pre class="src src-jupyter-python">import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.stats import norm as normal

mpl.rcParams["font.family"] = "Open Sans"
mpl.rcParams["axes.labelweight"] = "regular"



def psi(x: np.array) -&gt; np.array:
    assert all(x &gt;= 0), "Negative number in array"
    return x * (normal.cdf(x) - 0.5) / normal.pdf(x)


def elasticity(rel_disag: np.array) -&gt; np.array:
    return 1 / (1 + psi(rel_disag))


disag = np.linspace(0, 1, 100)
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
ax1.plot(disag, psi(disag))
ax1.set(xlabel="x", ylabel="$\psi(x)$")
ax1.grid(True)
ax2.plot(disag, elasticity(disag))
ax2.set(xlabel="Relative Disagreement", ylabel="Volume-Volatility Elasticity")
ax2.grid(True)
</pre>
</div>
</div>
</div>
</div>
<div id="postamble" class="status">
<p class="author">Author: Guilherme Salome</p>
<p class="date">Created: 2023-04-16 Sun 23:18</p>
</div>
</body>
</html>